In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

Numpy


In [5]:
## numpy array
a = np.array([1, 4, 6])
print a.shape
print 
print np.ones((3, 4))
print
print np.zeros((2, 5))
print
print np.arange(6).reshape(2, 3)
print
print a.T
print
print np.hstack([a, a])
print
print np.vstack([a, a])


(3L,)

[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]]

[[ 0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]]

[[0 1 2]
 [3 4 5]]

[1 4 6]

[1 4 6 1 4 6]

[[1 4 6]
 [1 4 6]]

In [6]:
## element wise or matrix multiplication
print np.dot(a, a)  # or a.dot(a)
print 
print a*a


53

[ 1 16 36]

In [7]:
# you can convert a 1-d array to a 2-d array with np.newaxis
print 'a:'
print a
print 'a.shape:', a.shape
print 
print 'a[np.newaxis] is a 2-d row vector:'
print a[np.newaxis]
print 'a[np.newaxis].shape:', a[np.newaxis].shape
print

print 'a[np.newaxis].T: is a 2-d column vector:'
print a[np.newaxis].T
print 'a[np.newaxis].T.shape:', a[np.newaxis].T.shape
print


a:
[1 4 6]
a.shape: (3L,)

a[np.newaxis] is a 2-d row vector:
[[1 4 6]]
a[np.newaxis].shape: (1L, 3L)

a[np.newaxis].T: is a 2-d column vector:
[[1]
 [4]
 [6]]
a[np.newaxis].T.shape: (3L, 1L)


In [8]:
# numpy provides a ton of other functions for working with matrices
m = np.array([[1, 2],[3, 4]])
m_inverse = np.linalg.inv(m)
print 'inverse of [[1, 2],[3, 4]]:'
print m_inverse
print

print 'm.dot(m_inverse):'
print m.dot(m_inverse)


inverse of [[1, 2],[3, 4]]:
[[-2.   1. ]
 [ 1.5 -0.5]]

m.dot(m_inverse):
[[  1.00000000e+00   1.11022302e-16]
 [  0.00000000e+00   1.00000000e+00]]

In [9]:
# and for doing all kinds of sciency type stuff.  like generating random numbers:
np.random.seed(5678)
n = np.random.randn(3, 4)
print 'a matrix with random entries drawn from a Normal(0, 1) distribution:'
print n


a matrix with random entries drawn from a Normal(0, 1) distribution:
[[-0.70978938 -0.01719118  0.31941137 -2.26533107]
 [-1.37745366  1.94998073 -0.56381007 -0.84373759]
 [ 0.22453858 -0.39137772  0.60550347 -0.68615034]]

In [10]:
np.random.seed(3333)
n_data = 10 # number of data points. i.e. N
n_dim = 5   # number of dimensions of each datapoint.  i.e. D

betas = np.random.randn(n_dim + 1)

X_no_constant = np.random.randn(n_data, n_dim)
print 'X_no_constant:'
print X_no_constant
print 

# INSERT YOUR CODE HERE!
X = np.hstack([np.ones(n_data)[np.newaxis].T, X_no_constant])
y = np.dot(X, betas)

# Tests:
y_expected = np.array([-0.41518357, -9.34696153, 5.08980544, 
                       -0.26983873, -1.47667864, 1.96580794, 
                       6.87009791, -2.07784135, -0.7726816, 
                       -2.74954984])
np.testing.assert_allclose(y, y_expected)
print '****** Tests passed! ******'


X_no_constant:
[[-0.92232935  0.27352359 -0.86339625  1.43766044 -1.71379871]
 [ 0.179322   -0.89138595  2.13005603  0.51898975 -0.41875106]
 [ 0.34010119 -1.07736609 -1.02314142 -1.02518535  0.40972072]
 [ 1.18883814  1.01044759  0.3108216  -1.17868611 -0.49526331]
 [-1.50248369 -0.196458    0.34752922 -0.79200465 -0.31534705]
 [ 1.73245191 -1.42793626 -0.94376587  0.86823495 -0.95946769]
 [-1.07074604 -0.06555247 -2.17689578  1.58538804  1.81492637]
 [-0.73706088  0.77546031  0.42653908 -0.51853723 -0.53045538]
 [ 1.09620536 -0.69557321  0.03080082  0.25219596 -0.35304303]
 [-0.93971165  0.04448078  0.04273069  0.4961477  -1.7673568 ]]

****** Tests passed! ******

Pandas


In [11]:
b = np.array([[6, 7], [3, 1], [4, 0]])
df = pd.DataFrame(data=b,  columns=['Weight', 'Height'])
print 'b:'
print b
print 
print 'DataFame version of b:'
print df
print


b:
[[6 7]
 [3 1]
 [4 0]]

DataFame version of b:
   Weight  Height
0       6       7
1       3       1
2       4       0


In [12]:
baseball = pd.read_csv('data/baseball.dat.txt')

In [16]:
# baseball.head()
# baseball.describe()
# baseball.keys()
# baseball.info()

In [18]:
millionaire_indices = baseball['Salary'] > 1000
# you can use the query indices to look at a subset of your original dataframe
print 'baseball.shape:', baseball.shape
print "baseball[millionaire_indices].shape:", baseball[millionaire_indices].shape
baseball[millionaire_indices][['Salary', 'AVG', 'Runs', 'Name']].head()


baseball.shape: (337, 18)
baseball[millionaire_indices].shape: (139, 18)
Out[18]:
Salary AVG Runs Name
0 3300 0.272 69 Andre Dawson
1 2600 0.269 58 Steve Buchele
2 2500 0.249 54 Kal Daniels
3 2475 0.260 59 Shawon Dunston
4 2313 0.273 87 Mark Grace

In [19]:
shoe_size_df = pd.read_csv('data/baseball2.dat.txt')
shoe_size_df.shape


Out[19]:
(3, 2)

In [20]:
merged = pd.merge(baseball, shoe_size_df, on=['Name'])
merged


Out[20]:
Salary AVG OBP Runs Hits Doubles Triples HR RBI Walks SO SB Errs free agency eligibility free agent in 1991/2 arbitration eligibility arbitration in 1991/2 Name Shoe Size
0 3300 0.272 0.302 69 153 21 4 31 104 22 80 4 3 1 0 0 0 Andre Dawson 11
1 2313 0.273 0.346 87 169 28 5 8 58 70 53 3 8 0 0 1 0 Mark Grace 13
2 200 0.203 0.240 39 64 10 1 10 33 14 96 13 6 0 0 0 0 Sammy Sosa 12

In [23]:
merged_outer = pd.merge(baseball, shoe_size_df, on=['Name'], how='outer')
merged_outer.head()


Out[23]:
Salary AVG OBP Runs Hits Doubles Triples HR RBI Walks SO SB Errs free agency eligibility free agent in 1991/2 arbitration eligibility arbitration in 1991/2 Name Shoe Size
0 3300 0.272 0.302 69 153 21 4 31 104 22 80 4 3 1 0 0 0 Andre Dawson 11
1 2600 0.269 0.335 58 111 17 2 18 66 39 69 0 3 1 1 0 0 Steve Buchele NaN
2 2500 0.249 0.337 54 115 15 1 17 73 63 116 6 5 1 0 0 0 Kal Daniels NaN
3 2475 0.260 0.292 59 128 22 7 12 50 23 64 21 21 0 0 1 0 Shawon Dunston NaN
4 2313 0.273 0.346 87 169 28 5 8 58 70 53 3 8 0 0 1 0 Mark Grace 13

Plotting with Matplotlib


In [25]:
baseball = pd.read_csv('data/baseball.dat.txt')

In [26]:
f = plt.figure()
plt.hist(baseball['Hits'], bins=15)   # plot or scatter
plt.xlabel('Number of Hits')
plt.ylabel('Frequency')
plt.title('Histogram of Number of Hits')
f.set_size_inches(10, 5)
plt.show()


Sci-Kit Learn Linear Regression


In [31]:
from sklearn import linear_model

In [32]:
## linear regression models
model_lr = linear_model.LinearRegression()
model_ridge = linear_model.Ridge(alpha=1)
model_lasso = linear_model.Lasso(alpha=1)
model_en = linear_model.ElasticNet(alpha=0.5, l1_ratio=0.1)

In [27]:
def mean_squared_error(y_true, y_pred):
    """
    calculate the mean_squared_error given a vector of true ys and a vector of predicted ys
    """
    diff = y_true - y_pred
    return np.dot(diff, diff) / len(diff)

def predict_test_values(model, X_train, y_train, X_test):
    model.fit(X_train, y_train)
    return model.predict(X_test)
    

def calc_train_and_test_error(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    return mean_squared_error(y_train, y_pred_train), mean_squared_error(y_test, y_pred_test)

In [33]:
# load overfitting data
with np.load('data/overfitting_data.npz') as data:
    x_train = data['x_train']
    y_train = data['y_train']
    x_test = data['x_test']
    y_test = data['y_test']

In [34]:
## Model performance

print "Linear Regression Training and Test Errors:"
print calc_train_and_test_error(model_lr, x_train, y_train, x_test, y_test)
print

print "Ridge Regression Training and Test Errors:"
print calc_train_and_test_error(model_ridge, x_train, y_train, x_test, y_test)
print

print "Lasso Regression Training and Test Errors:"
print calc_train_and_test_error(model_lasso, x_train, y_train, x_test, y_test)
print

print 'ElasticNet Training and Test Errors:'
print calc_train_and_test_error(model_en, x_train, y_train, x_test, y_test)
print


Linear Regression Training and Test Errors:
(2.4835421623899702e-05, 283.52728792173116)

Ridge Regression Training and Test Errors:
(0.018634112597992421, 9.5641560683730305)

Lasso Regression Training and Test Errors:
(4.1142351854727677, 4.6028697944107098)

ElasticNet Errors:
(1.9616145613107794, 3.8189893038857918)


In [35]:
n_disp_coefs = 10

print 'Linear Regression Coefficients:'
print model_lr.coef_[:n_disp_coefs]
print

print 'Ridge Regression Coefficients:'
print model_ridge.coef_[:n_disp_coefs]
print

print 'LASSO Coefficients:'
print model_lasso.coef_[:n_disp_coefs]
print

print 'ElasticNet Coefficients:'
print model_en.coef_[:n_disp_coefs]
print


Linear Regression Coefficients:
[  5.22757470e-01   2.78289824e+00   4.04383818e+00   1.17544241e+00
   3.13230537e-01  -1.28127160e-01   5.11682173e-01   3.83754833e-03
  -1.19481096e+00   9.56448172e-01]

Ridge Regression Coefficients:
[ 1.01611626  1.77246927  3.06534773 -0.0333898   0.04378713  0.10472107
 -0.13445823  0.12656315  0.05779722  0.10204281]

LASSO Coefficients:
[ 0.03375129  0.92694409  1.92659636  0.          0.          0.         -0.
  0.          0.          0.        ]

ElasticNet Coefficients:
[ 0.61034977  1.16675401  1.79600624  0.          0.          0.00686607
  0.          0.02027936  0.00469244  0.00644604]


In [36]:
print "Sum of Linear Regression Coefficients:"
print np.sum(np.abs(model_lr.coef_))
print

print "Sum of Ridge Regression Coefficients:"
print np.sum(np.abs(model_ridge.coef_))
print

print "Sum of Lasso Regression Coefficients:"
print np.sum(np.abs(model_lasso.coef_))
print

print 'Sum of ElasticNet Coefficients'
print np.sum(np.abs(model_en.coef_))
print


Sum of Linear Regression Coefficients:
338.387469048

Sum of Ridge Regression Coefficients:
62.4912904062

Sum of Lasso Regression Coefficients:
2.88729174216

Sum of ElasticNet Coefficients
9.82525057342

Model Selection

Types of Cross Validation

Validation Set Cross Validation


In [37]:
# a helper function for performing validation set cross validation
from sklearn.cross_validation import train_test_split
validation_portion = 0.1
seed = 1234
x_train_small, x_valid, y_train_small, y_valid = \
    train_test_split(x_train, y_train, test_size=validation_portion, random_state=seed)

print 'Original Training Set Size:'
print x_train.shape, y_train.shape
print

print 'Reducted Training Set Size:'
print x_train_small.shape, y_train_small.shape
print

print 'Validation Set Size:'
print x_valid.shape, y_valid.shape
print


Original Training Set Size:
(600L, 598L) (600L,)

Reducted Training Set Size:
(540L, 598L) (540L,)

Validation Set Size:
(60L, 598L) (60L,)


In [38]:
def validation_set_error(model, x_train, y_train, validation_portion=0.1, seed=1234):
    # FILL IN YOUR CODE HERE

    x_train_small, x_valid, y_train_small, y_valid = \
        train_test_split(x_train, y_train, test_size=validation_portion, random_state=seed)
    model.fit(x_train_small, y_train_small)
    y_pred_valid = model.predict(x_valid)
    return mean_squared_error(y_valid, y_pred_valid)
      
    
# set up models
model_lr_valid = linear_model.LinearRegression()
model_ridge_valid = linear_model.Ridge(alpha=10)

# calculate errors
valid_portion = .1
n_seeds = 5
print "Linear Regression Training and Test Errors:"
# FILL IN YOUR CODE HERE
print calc_train_and_test_error(model_lr_valid, x_train_small, y_train_small, x_test, y_test)

print
print "Linear Regression Validation Errors:"
# FILL IN YOUR CODE HERE
print validation_set_error(model_lr_valid, x_train, y_train, validation_portion=0.1, seed=1234)
print 

for seed in range(n_seeds):
    print validation_set_error(model_lr_valid, x_train, y_train, validation_portion=valid_portion, seed=seed)
    print

print "Ridge Regression Training and Test Errors:"
# FILL IN YOUR CODE HERE
print calc_train_and_test_error(model_ridge_valid, x_train_small, y_train_small, x_test, y_test)


print
print "Ridge Regression Validation Errors:"
# FILL IN YOUR CODE HERE
print validation_set_error(model_ridge_valid, x_train, y_train, validation_portion=0.1, seed=1234)
print 

for seed in range(n_seeds):
    print validation_set_error(model_ridge_valid, x_train, y_train, validation_portion=valid_portion, seed=seed)
    print


Linear Regression Training and Test Errors:
(6.5894013208313341e-28, 9.6373710755996189)

Linear Regression Validation Errors:
9.36759564041

10.4039988935

11.6352333478

8.8241606146

9.20945551949

7.60088829288

Ridge Regression Training and Test Errors:
(0.037116269305341815, 4.8163269566646871)

Ridge Regression Validation Errors:
4.44120540399

3.61817500364

7.12476980873

5.32580668571

5.74292650031

4.6239411424

K-Fold Cross Validation


In [42]:
# scikit learn provides a useful object to help you perform kfold cross validation
from sklearn.cross_validation import KFold

n_data = len(y_train)
fold_count = 0
for train_reduced_row_ids, valid_row_ids in KFold(n_data, n_folds=4):
    print
    print 
    print "FOLD %d:" % fold_count
    print "-------"
    print("train_ids:\n%s\n\nvalid_ids\n%s" % (train_reduced_row_ids, valid_row_ids))
    x_train_reduced = x_train[train_reduced_row_ids]
    y_train_reduced = y_train[train_reduced_row_ids]
    x_valid = x_train[valid_row_ids]
    y_valid = y_train[valid_row_ids]
    fold_count += 1

In [43]:
# NOTE: KFolds isn't random at all.  It's important to shuffle your data first before using it. 
from sklearn.utils import shuffle
x_train_shuffled, y_train_shuffled = shuffle(x_train, y_train)

In [44]:
def kfold_error(model, x_train, y_train, k=4, seed=1234):
    # FILL IN YOUR CODE HERE
    
    # shuffle training data
    x_train_shuffled, y_train_shuffled = shuffle(x_train, y_train, random_state=seed)
    
    n_data = len(y_train)
    error_sum = 0
    for train_reduced_row_ids, valid_row_ids in KFold(n_data, n_folds=k):
        x_train_reduced = x_train_shuffled[train_reduced_row_ids]
        y_train_reduced = y_train_shuffled[train_reduced_row_ids]
        x_valid = x_train_shuffled[valid_row_ids]
        y_valid = y_train_shuffled[valid_row_ids]
        model.fit(x_train_reduced, y_train_reduced)
        y_valid_pred = model.predict(x_valid)
        error_sum += mean_squared_error(y_valid, y_valid_pred)
    return error_sum*1.0 / k
    

# set up models
model_lr_valid = linear_model.LinearRegression()
model_ridge_valid = linear_model.Ridge(alpha=10)

# calculate errors
n_seeds = 3
k = 5

print "Linear Regression Training and Test Errors:"
# FILL IN YOUR CODE HERE
print calc_train_and_test_error(model_lr_valid, x_train, y_train, x_test, y_test)

print
print "Linear Regression K-Fold Errors:"
# FILL IN YOUR CODE HERE
print 
for seed in range(n_seeds):
    print kfold_error(model_lr_valid, x_train, y_train, k=k, seed=seed)
    print 

print
print "Ridge Regression Training and Test Errors:"
# FILL IN YOUR CODE HERE
print calc_train_and_test_error(model_ridge_valid, x_train, y_train, x_test, y_test)


print
print "Ridge Regression K-Fold Errors:"
# FILL IN YOUR CODE HERE
print 
for seed in range(n_seeds):
    print kfold_error(model_ridge_valid, x_train, y_train, k=k, seed=seed)
    print


Linear Regression Training and Test Errors:
(2.4835421623899702e-05, 283.52728792173116)

Linear Regression K-Fold Errors:

7.21045028087

7.3510411941

6.69216918868


Ridge Regression Training and Test Errors:
(0.064063243432624289, 4.9205415455726982)

Ridge Regression K-Fold Errors:

5.77769677178

5.78170553945

5.6587338965

Model and Hyperparameter Selection with Cross Validation


In [45]:
def model_name(model):
    s = model.__str__().lower()
    if "linearregression" in s:
        return 'LinearRegression'
    elif "lasso" in s:
        return 'Lasso(a=%g)' % model.alpha
    elif "ridge" in s:
        return 'Ridge(a=%g)' % model.alpha
    elif "elastic" in s:
        return 'ElasticNet(a=%g, r=%g)' % (model.alpha, model.l1_ratio)
    else:
        raise ValueError("Unknown Model Type")

def create_models(alphas=(.01, .03, .1, .3, 1, 3), l1_ratios=(.7, .5, .3)):
    models = [linear_model.LinearRegression()]
    models.extend([linear_model.Ridge(a) for a in alphas])
    models.extend([linear_model.Lasso(a) for a in alphas])
    models.extend([linear_model.ElasticNet(a, l1_ratio=l) for a in alphas for l in l1_ratios])
    return models

def results_df(models, betas_true, x_train, y_train, x_test, y_test, k=4):
    n_data, n_dim = x_train.shape

    n_zeros = n_dim - len(betas_true)
    
    betas_true = np.concatenate([betas_true, np.zeros(n_zeros)])
    
    # fit models to training data
    [m.fit(x_train, y_train) for m in models]
    
    betas = np.vstack([betas_true] + [m.coef_ for m in models])
    beta_names = ['Beta ' + str(i) for i in range(n_dim)]

    # set up model names
    model_names =  ["True Coefs"] + [model_name(m) for m in models]
    df = pd.DataFrame(data=betas, columns=beta_names, index=model_names)

    # calculate training errors
    y_preds = [m.predict(x_train) for m in models]
    errors = [np.nan] + [mean_squared_error(y_train, y_pred) for y_pred in y_preds]
    df['Train Error'] = errors

    # calculate validation errors
    errors = [np.nan] + [kfold_error(m, x_train, y_train, k=k) for m in models]
    df['Cross Validation Error'] = errors

    # calculate test errors
    y_preds = [m.predict(x_test) for m in models]
    errors = [np.nan] + [mean_squared_error(y_test, y_pred) for y_pred in y_preds]
    df['Test Error'] = errors

    return df


# these are some of the magic parameters that I used to actually 
# generate the overfitting dataset
n_dim = 598
n_dim_meaningful = 3
n_dim_disp_extra = 2

# the actual betas used to generate the y values.  the rest were 0.
betas_true = np.arange(n_dim_meaningful) + 1

# create a whole bunch of untrained models
models = create_models(alphas=(.01, .03, .1, .3, 1), l1_ratios=(.9, .7, .5))

# 
all_results = results_df(models, betas_true, x_train, y_train, x_test, y_test, k=4)

# decide which columns we want to display
disp_cols = ["Beta " + str(i) for i in range(n_dim_meaningful + n_dim_disp_extra)] 
disp_cols += ['Train Error', 'Cross Validation Error', 'Test Error']

# display the results
all_results[disp_cols]


Out[45]:
Beta 0 Beta 1 Beta 2 Beta 3 Beta 4 Train Error Cross Validation Error Test Error
True Coefs 1.000000 2.000000 3.000000 0.000000 0.000000 NaN NaN NaN
LinearRegression 0.522757 2.782898 4.043838 1.175442 0.313231 0.000025 6.550726 8.569427
Ridge(a=0.01) 0.867059 2.290546 3.729941 0.570987 0.380292 0.001033 6.590630 8.592261
Ridge(a=0.03) 1.028546 2.023949 3.548358 0.237767 0.370161 0.002749 6.588766 8.590441
Ridge(a=0.1) 1.088696 1.847044 3.386562 0.012841 0.272410 0.005693 6.582288 8.584100
Ridge(a=0.3) 1.065763 1.788885 3.247394 -0.050784 0.142292 0.010186 6.564170 8.566235
Ridge(a=1) 1.016116 1.772469 3.065348 -0.033390 0.043787 0.018634 6.504991 8.506474
Lasso(a=0.01) 1.076240 1.956283 2.955116 0.000000 0.025530 0.213388 1.701747 1.849997
Lasso(a=0.03) 1.042435 1.941418 2.952131 0.000000 0.002460 0.526780 1.223279 1.196381
Lasso(a=0.1) 0.972258 1.869852 2.892761 -0.000000 0.000000 0.968836 1.024546 0.895046
Lasso(a=0.3) 0.764523 1.659750 2.677197 0.000000 0.000000 1.235803 1.264843 1.120443
Lasso(a=1) 0.033751 0.926944 1.926596 0.000000 0.000000 4.114235 4.166927 4.301203
ElasticNet(a=0.01, r=0.9) 1.073393 1.951365 2.947436 0.000000 0.027348 0.195976 1.760720 1.935114
ElasticNet(a=0.01, r=0.7) 1.066012 1.936668 2.931448 0.000000 0.031150 0.163034 1.919308 2.167257
ElasticNet(a=0.01, r=0.5) 1.046440 1.907626 2.908069 0.000000 0.029275 0.127897 2.191965 2.583608
ElasticNet(a=0.03, r=0.9) 1.042974 1.931661 2.936235 0.000000 0.008043 0.484044 1.261913 1.240859
ElasticNet(a=0.03, r=0.7) 1.038972 1.908386 2.901951 0.000000 0.014723 0.401288 1.361827 1.360261
ElasticNet(a=0.03, r=0.5) 1.026654 1.885132 2.862618 0.000000 0.020993 0.316085 1.547012 1.592804
ElasticNet(a=0.1, r=0.9) 0.971808 1.860572 2.872529 -0.000000 0.000000 0.960364 1.041134 0.906070
ElasticNet(a=0.1, r=0.7) 0.968971 1.841385 2.828162 -0.000000 0.000000 0.913444 1.092529 0.967566
ElasticNet(a=0.1, r=0.5) 0.961790 1.813670 2.765941 0.000000 0.000000 0.807426 1.215067 1.092389
ElasticNet(a=0.3, r=0.9) 0.771897 1.639918 2.624446 0.000000 0.000000 1.277930 1.307354 1.168137
ElasticNet(a=0.3, r=0.7) 0.785336 1.603633 2.528307 0.000000 0.000000 1.370249 1.400441 1.273228
ElasticNet(a=0.3, r=0.5) 0.797276 1.571245 2.442910 0.000000 0.000000 1.469114 1.512212 1.390285
ElasticNet(a=1, r=0.9) 0.125708 0.934637 1.835475 0.000000 0.000000 4.112881 4.189787 4.313581
ElasticNet(a=1, r=0.7) 0.265489 0.946058 1.698143 0.000000 0.000000 4.168660 4.233218 4.410287
ElasticNet(a=1, r=0.5) 0.366721 0.954115 1.599575 0.000000 0.000000 4.252089 4.309107 4.534828

In [46]:
# scikit learn includes some functions for making cross validation easier 
# and computationally faster for a some models
from sklearn import linear_model
model_ridge_cv = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0])
model_lasso_cv = linear_model.LassoCV(alphas=[0.1, 1.0, 10.0])
model_en_cv = linear_model.ElasticNetCV(l1_ratio=[.9], n_alphas=100)

In [47]:


In [ ]: